﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using LingDong.NaiveCrawler;
using NLog;

namespace LingDong.NaiveCrawlerExamples
{
    class Program
    {
        static void Main(string[] args)
        {
            logger = LogManager.GetCurrentClassLogger();
            //TianyaForum();

            //SinaNews();

            //QQNews();

            //News163();

            //IFengNews();

            //SohuNews();

            //DownloadHub();

            DownloadHubPage();
        }

        private static void DownloadHubPage()
        {
            string inputPath = @"F:\LingDongData\Classify\output.txt";
            string outputPath = @"F:\LingDongData\Classify\Hub\";
            Crawl.DownloadUrlListFromFile(inputPath, outputPath);
        }

        private static void DownloadHub()
        {
            string output = @"F:\LingDongData\Classify\output.txt";
            List<string> webList = new List<string>() { 
                @"http://www.hao123.com/",
                @"http://www.114la.com/",
                @"http://www.265.com/",
                @"http://hao.qq.com/",
                @"http://www.wndhw.com/",
                @"http://123.sogou.com/",
                @"http://dir.iask.com/",
                @"http://www.5566.net/",
            };
            foreach (string web in webList)
            {
                Crawl.DownloadCataloguePage(web, web, output);
                logger.Debug(web);
            }
        }

        #region news

        private static void SohuNews()
        {
            string prefix = @"http://news.sohu.com/201";

            // 国际要闻
            SubForum(@"http://news.sohu.com/1/0903/62/subject212846267_21{0}.shtml", prefix, 99);

            // 海外博览
            SubForum(@"http://news.sohu.com/s2005/guojihuabian_10{0}.shtml", prefix, 40);
        }

        private static void IFengNews()
        {
            string prefix = @"http://news.ifeng.com/photo/news/detail_";

            // 新闻图片
            SubForum(@"http://news.ifeng.com/photo/news/list_0/{0}.shtml", prefix, 50);
        }
        
        private static void News163()
        {
            string prefix = @"http://news.163.com/1";

            // 社会新闻
            SubForum(@"http://news.163.com/special/00013BUR/shehuibj_0{0}.html", prefix, 6);
            // 反腐倡廉
            SubForum(@"http://news.163.com/special/00013C0B/shiyongxw_0{0}.html", prefix, 6);
            // 各地新闻
            SubForum(@"http://news.163.com/special/00013C0B/gedixw_0{0}.html", prefix, 6);
            // 社会与法
            SubForum(@"http://news.163.com/special/00013BUR/shehuiyf_0{0}.html", prefix, 6);
            // 国际新闻
            SubForum(@"http://news.163.com/special/00013C0O/guojibjtj_0{0}.html", prefix, 6);
            // 环球经济
            SubForum(@"http://news.163.com/special/00013C0O/huanqiujj_0{0}.html", prefix, 6);
        }

        private static void QQNews()
        {
            string prefix = @"http://news.qq.com/a/";

            // 实用新闻
            SubForum(@"http://news.qq.com/newsgn/syxw/shiyongxinwen_{0}.htm", prefix, 50);
            // 时政新闻
            SubForum(@"http://news.qq.com/newsgn/zhxw/shizhengxinwen_{0}.htm", prefix, 50);
            // 社会万象
            SubForum(@"http://news.qq.com/newssh/shwx/shehuiwanxiang_{0}.htm", prefix, 50);
            // 奇闻趣事
            SubForum(@"http://news.qq.com/l/newssh/qwqs/qiwenqushi_{0}.htm", prefix, 50);
        }

        private static void SinaNews()
        {
            string prefix = @"http://news.sina.com.cn/c/";

            // 各地新闻
            SubForum(@"http://roll.news.sina.com.cn/news/gnxw/gdxw1/index_{0}.shtml", prefix, 50);
            // 港澳台新闻
            SubForum(@"http://roll.news.sina.com.cn/news/gnxw/gatxw/index_{0}.shtml", prefix, 50);
            // 时政要闻
            SubForum(@"http://roll.news.sina.com.cn/news/gnxw/szyw/index_{0}.shtml", prefix, 50);
            // 综述分析
            SubForum(@"http://roll.news.sina.com.cn/news/gnxw/zs-pl/index_{0}.shtml", prefix, 50);
        }

        #endregion


        #region Tianya

        private static void TianyaForum()
        {
            List<string> urlList = new List<string>() 
            { 
                @"http://www.tianya.cn/publicforum/articleslist/0/no04.shtml",
                @"http://www.tianya.cn/publicforum/articleslist/0/fansunion.shtml",
                @"http://www.tianya.cn/publicforum/articleslist/0/tianyamyself.shtml",
                @"http://www.tianya.cn/techforum/articleslist/0/607.shtml",
                @"http://www.tianya.cn/techforum/articleslist/0/650.shtml",
            };

            List<string> prefixList = new List<string>()
            {
                @"http://www.tianya.cn/publicforum/content/no04/1/",
                @"http://www.tianya.cn/publicforum/content/fansunion/1/",
                @"http://www.tianya.cn/publicforum/content/tianyamyself/1/",
                @"http://www.tianya.cn/techforum/content/607/1/",
                @"http://www.tianya.cn/techforum/content/650/1/",
            };

            int count = urlList.Count;
            for (int i = 0; i < count; i++)
            {
                Crawl.DownloadCataloguePage(urlList[i], prefixList[i], outputFile);

                System.Threading.Thread.Sleep(2000);
            }
        }

        #endregion


        private static void DownloadHTML()
        {
            List<string> webList = new List<string>() { //"sina", "163", "qq", 
                "ifeng" };
            foreach (string web in webList)
            {
                string inputPath = String.Format(@"F:\LingDongData\WebPage\Forum\{0}.txt", web);
                string outputPath = String.Format(@"F:\LingDongData\WebPage\Forum\{0}\", web);
                Crawl.DownloadUrlListFromFile(inputPath, outputPath);
            }
        }

        private static void SubForum(string pattern, string prefix, int count)
        {
            string innerText = @"图)";
            for (int i = 20; i < count; i++)
            {
                string url = String.Format(pattern, i);
                logger.Debug(url);
                Crawl.DownloadCataloguePage(url, prefix, innerText, outputFile);
                System.Threading.Thread.Sleep(1000);
            }
        }


        private static Logger logger;
        private static string outputFile = @"F:\LingDongData\WebPage\Forum\links.txt";
    }
}
